// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "content/renderer/android/email_detector.h"

#include <memory>

#include "base/logging.h"
#include "base/strings/utf_string_conversions.h"
#include "content/public/renderer/android_content_detection_prefixes.h"
#include "net/base/escape.h"
#include "third_party/icu/source/i18n/unicode/regex.h"

namespace {

// Maximum length of an email address.
const size_t kMaximumEmailLength = 254;

// Regex to match email addresses.
// This is more specific than RFC 2822 (uncommon special characters are
// disallowed) in order to avoid false positives.
// Delimiters are word boundaries to allow punctuation, quote marks etc. around
// the address.
const char kEmailRegex[] = "\\b[A-Z0-9._%+-]+@[A-Z0-9-]+(\\.[A-Z0-9-]+)*(\\.[A-Z]{2,6})\\b";

} // anonymous namespace

namespace content {

EmailDetector::EmailDetector()
{
}

size_t EmailDetector::GetMaximumContentLength()
{
    return kMaximumEmailLength;
}

GURL EmailDetector::GetIntentURL(const std::string& content_text)
{
    if (content_text.empty())
        return GURL();

    return GURL(kEmailPrefix + net::EscapeQueryParamValue(content_text, true));
}

bool EmailDetector::FindContent(const base::string16::const_iterator& begin,
    const base::string16::const_iterator& end,
    size_t* start_pos,
    size_t* end_pos,
    std::string* content_text)
{
    base::string16 utf16_input = base::string16(begin, end);
    icu::UnicodeString pattern(kEmailRegex);
    icu::UnicodeString input(utf16_input.data(), utf16_input.length());
    UErrorCode status = U_ZERO_ERROR;
    std::unique_ptr<icu::RegexMatcher> matcher(
        new icu::RegexMatcher(pattern, input, UREGEX_CASE_INSENSITIVE, status));
    if (matcher->find()) {
        *start_pos = matcher->start(status);
        DCHECK(U_SUCCESS(status));
        *end_pos = matcher->end(status);
        DCHECK(U_SUCCESS(status));
        icu::UnicodeString content_ustr(matcher->group(status));
        DCHECK(U_SUCCESS(status));
        base::UTF16ToUTF8(content_ustr.getBuffer(), content_ustr.length(),
            content_text);
        return true;
    }

    return false;
}

} // namespace content
